from pyspark import SparkConf, SparkContext
import os

os.environ['PYSPARK_PYTHON'] = "C:/Python310/python.exe"

conf = SparkConf().setMaster("local[*]").setAppName("test_spark")
sc = SparkContext(conf=conf)

# 准备数据
rdd = sc.parallelize([("男", 20), ("男", 30), ("女", 33), ("女", 44)])
rdd = rdd.reduceByKey(lambda a, b: a + b)
print(rdd.collect())
